# === setup === #
library(here)
source(here("GitControlled/Codes/0_1_ls_packages.R"))
source(here("GitControlled/Codes/0_0_functions.R"))
# /*===== Data =====*/
reg_data <-
here("Shared/Data/WaterAnalysis/comp_reg_dt.rds") %>%
readRDS() %>%
.[year %in% 2008:2012 & usage <= 40,] %>%
.[,tr_year:=factor(paste0(tr,year))] %>%
.[,trs_year:=factor(paste0(trs,year))]
- NOTE
- When aggregating the data, we need to carefully deal with the data
related to a township located in Gosper county in TB
(
tr=="5_22). This township is designated as a phase 3
groundwater quantity management area which requires allocation of
pumping. Groundwater pumping was limited to a total of 27 inches-acre
for 2009-2011.
# reg_data[tr=="5_22", ]
# reg_data[tr=="5_22", wellid] %>% unique() %>% length()
agg_reg_data <-
copy(reg_data) %>%
.[tr!="5_22",] %>%
.[,.(
sum_usage = sum(usage),
mean_usage = mean(usage),
treat2 = mean(treat2),
# --- soil --- #
silttotal_r = mean(silttotal_r),
claytotal_r = mean(claytotal_r),
slope_r = mean(slope_r),
ksat_r = mean(ksat_r),
awc_r = mean(awc_r),
# --- weather --- #
sum_pr_in = sum(pr_in),
mean_pr_in = mean(pr_in),
sum_pet_in = sum(pet_in),
mean_pet_in = mean(pet_in),
sum_gdd_in = sum(gdd_in),
mean_gdd_in = mean(gdd_in),
# --- tr --- #
tr = tr
),by=wellid] %>%
unique(.,by="wellid")
Well-year level
Data
# /*===== control variables =====*/
# remove gdd_in
cov_ls <- c(
# --- weather --- #
"pr_in","pet_in",
# --- soil --- #
"silttotal_r", "claytotal_r", "slope_r", "ksat_r", "awc_r"
)
data <- reg_data
Y <- data[, usage]
T <- data[, treat2]
X <- data[, ..cov_ls]
cl_var <- data[,tr_year]
#/*--------------------------------*/
#' ## 1st CF
#/*--------------------------------*/
set.seed(23456)
forest_W <- regression_forest(X, T, num.trees = 4000, clusters = cl_var)
W_hat <- predict(forest_W)$predictions
forest_Y <- regression_forest(X, Y, num.trees = 4000, clusters = cl_var)
Y_hat <- predict(forest_Y)$predictions
cf_raw <- causal_forest(
X=X,
Y=Y,
W=T,
# Y.hat = Y_hat,
# W.hat = W_hat,
clusters = cl_var,
tune.parameters = "all",
num.trees = 4000
)
varimp = variable_importance(cf_raw)
selected_vars = which(varimp > mean (varimp))
# selected_vars = which(varimp/mean(varimp) > 0.2)
se_cov <- data[,cov_ls[selected_vars], with=FALSE]
vis_cf_raw <- gen_impact_viz(
cf_res= cf_raw,
data_base=data,
treat_var='treat2',
var_ls= cov_ls,
var_ls_int = cov_ls
)
vis_cf_raw

Well level Data
Sum
cov_ls <- c(
# --- weather --- #
"sum_pr_in","sum_pet_in",
# --- soil --- #
"silttotal_r", "claytotal_r", "slope_r", "ksat_r", "awc_r"
)
data <- agg_reg_data
Y <- data[, sum_usage]
T <- data[, treat2]
X <- data[, ..cov_ls]
cl_var <- data[,tr] %>% factor()
#/*--------------------------------*/
#' ## 1st CF
#/*--------------------------------*/
set.seed(23456)
forest_W <- regression_forest(X, T, num.trees = 4000, clusters = cl_var)
W_hat <- predict(forest_W)$predictions
forest_Y <- regression_forest(X, Y, num.trees = 4000, clusters = cl_var)
Y_hat <- predict(forest_Y)$predictions
cf_raw <- causal_forest(
X=X,
Y=Y,
W=T,
Y.hat = Y_hat,
W.hat = W_hat,
clusters = cl_var,
tune.parameters = "all",
num.trees = 4000
)
varimp = variable_importance(cf_raw)
selected_vars = which(varimp > mean (varimp))
# selected_vars = which(varimp/mean(varimp) > 0.2)
se_cov <- data[,cov_ls[selected_vars], with=FALSE]
vis_cf_raw <- gen_impact_viz(
cf_res= cf_raw,
data_base=data,
treat_var='treat2',
var_ls= cov_ls,
var_ls_int = cov_ls
)
vis_cf_raw

Mean
cov_ls <- c(
# --- weather --- #
"mean_pr_in","mean_pet_in",
# --- soil --- #
"silttotal_r", "claytotal_r", "slope_r", "ksat_r", "awc_r"
)
data <- agg_reg_data
Y <- data[, mean_usage]
T <- data[, treat2]
X <- data[, ..cov_ls]
cl_var <- data[,tr] %>% factor()
#/*--------------------------------*/
#' ## 1st CF
#/*--------------------------------*/
set.seed(23456)
forest_W <- regression_forest(X, T, num.trees = 4000, clusters = cl_var)
W_hat <- predict(forest_W)$predictions
forest_Y <- regression_forest(X, Y, num.trees = 4000, clusters = cl_var)
Y_hat <- predict(forest_Y)$predictions
cf_raw <- causal_forest(
X=X,
Y=Y,
W=T,
Y.hat = Y_hat,
W.hat = W_hat,
clusters = cl_var,
tune.parameters = "all",
num.trees = 4000
)
varimp = variable_importance(cf_raw)
selected_vars = which(varimp > mean (varimp))
# selected_vars = which(varimp/mean(varimp) > 0.2)
se_cov <- data[,cov_ls[selected_vars], with=FALSE]
vis_cf_raw <- gen_impact_viz(
cf_res= cf_raw,
data_base=data,
treat_var='treat2',
var_ls= cov_ls,
var_ls_int = cov_ls
)
vis_cf_raw
